---
title: "Analysis Code"
author: "Emma Berdan and Fabian Roger"
date: "1/16/2019"
output: html_notebook
---


```{r}
library(reshape)
library(phyloseq)
library(philr)
library(readr)
library(ape)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggrepel)
library(RColorBrewer)
library(cowplot)
library(here)
library(vegan)
library(cowplot)
library(ggpubr)
library(biomformat)
library(glmnet)
library(ANCOMBC)
```

#Read in data from Fabian
```{r}
#load(here("Data", "fullWorkspace.RData"))

#metadata
meta <- read_tsv(here("Data", "meta.txt")) %>% 
  as.data.frame()

rownames(meta) <- meta$SampleID

#taxa table
taxa_clust <- read_tsv(here("Data", "Tax_table_99.txt"))

#ASV table
ASV_table_C_filt_df <-  
  read_tsv(here("Data", "ASV_table_99.txt")) %>% 
  pivot_longer(-seq) %>% 
  pivot_wider(names_from = seq,values_from = value)

ASV_table_C_filt<- as.matrix(ASV_table_C_filt_df[,-1])

rownames(ASV_table_C_filt) <- ASV_table_C_filt_df$name

#TREE
TREE <- read.tree(here("Data", "Tree"))

```

#make phyloseq
```{r}

colnames(meta) = c("SampleID" , "type" , "Population", "Genotype" ,  "Salinity" ,  "Year")  
meta$Year = as.factor(meta$Year)
meta = meta %>% mutate(Population = str_replace(Population, "Bibel", "Justoya"))

meta$Population = factor(meta$Population,levels=c("Skeie", "Justoya", "Magnarp", "Smygehuk", "Ystad"))


taxa_clust_filt <- filter(taxa_clust, seq %in% colnames(ASV_table_C_filt)) %>% as.data.frame()
rownames(taxa_clust_filt) = taxa_clust_filt$seq
taxa_clust_m <- as.matrix(taxa_clust_filt[,-1])
rownames(taxa_clust_m) <- taxa_clust_filt$seq

ps <- phyloseq(otu_table(ASV_table_C_filt, 
                         taxa_are_rows = FALSE), 
               tax_table(taxa_clust_m),
               sample_data(meta), phy_tree(TREE))
```

#Look at sample ordination

```{r}

#Look at wrack


ps_wrack <- prune_samples(as.character(filter(meta, type == "wrack")$SampleID), ps)
sample_data(ps_wrack)$SampleID[1:4] = c("Justøya-1-1","Justøya-1-2", "Justøya-2-1", "Justøya-3-2")

site_labels = c("Skeie", "Justøya", "Magnarp", "Smygehuk", "Ystad")

GP.ordW <- ordinate(ps_wrack, "NMDS", "bray")

plot_ordination(ps_wrack, GP.ordW, type="samples", color="Population")+
  geom_text_repel(aes(label = SampleID), size = 3, alpha = 0.8)+
  scale_color_brewer(palette = "Set1",labels=c("Skeie", "Justøya", "Magnarp", "Smygehuk", "Ystad")) + theme_bw()



#Look at Larvae
ps_larvae <- prune_samples(as.character(filter(meta, type == "larvae")$SampleID), ps)

GP.ordL <- ordinate(ps_larvae, "NMDS", "bray")



plot_ordination(ps_larvae, GP.ordL, type="samples", color="Population", shape="Year")+
  geom_text_repel(aes(label = SampleID), size = 3, alpha = 0.8)+
  scale_color_brewer(palette = "Set1")+
  theme_bw()


#make PDFs


pdf("wrack_ordination.pdf",width=10,height=8)

plot_ordination(ps_wrack, GP.ordW, type="samples", color="Population")+
  geom_text_repel(aes(label = SampleID), size = 5, alpha = 0.8)+
  scale_color_brewer(palette = "Set1",labels=c("Skeie", "Justøya", "Magnarp", "Smygehuk", "Ystad"))+   labs(color = "Site") + theme(legend.position = "none",
    axis.text.x = element_text(size=14), 
    axis.title.x = element_text(size=16), 
    axis.text.y = element_text(size=14), 
    axis.title.y= element_text(size=16), 
    legend.text= element_text(size=14), 
    legend.title= element_text(size=16),
    strip.text= element_text(size=16),text = element_text(size = 16) ) + geom_point(size = 4) +
  theme_bw()
dev.off()

pdf("Initial_larval_ordination.pdf",width=10,height=8)
plot_ordination(ps_larvae, GP.ordL, type="samples", color="Population", shape="Year")+
  scale_color_brewer(palette = "Set1",labels=c("Skeie", "Justøya", "Magnarp", "Smygehuk", "Ystad"))+ labs(color = "Site") +
  theme_bw()
dev.off()




```

#removing wrack technical replicates and export all info
```{r}
rowSums(ASV_table_C_filt)

#to remove are Magnarp-1-2, Smyg-1-2, Bibel-1-1, Skeie-2-1,and Ystad-1-1 
#also removing Skeie-3-2 as this was clearly mislabeled

ps2 = subset_samples(ps, SampleID != "Magnarp-1-2" & SampleID != "Smyg-1-2" & SampleID != "Ystad-1-1" & SampleID != "Skeie-3-2" & SampleID != "Bibel-1-1" & SampleID != "Skeie-2-1")

OTU = t(otu_table(ps2))
tax = as.data.frame(tax_table(ps2))

all = merge(tax, OTU, by=0)

write.table(all, here("Data", "OTU_table_and_tax_info.csv"), sep=",")


#overall ordination


GP.ord <- ordinate(ps2, "NMDS", "bray")

plot_ordination(ps2, GP.ord, type="samples", shape="type",color="Population")+
  scale_color_brewer(palette = "Set1")+
  theme_bw()

pdf("Initial_ordination.pdf",width=10,height=8)
plot_ordination(ps2, GP.ord, type="samples", shape="type",color="Population")+
  scale_color_brewer(palette = "Set1",labels=c("Skeie", "Justøya", "Magnarp", "Smygehuk", "Ystad"))+  labs(color = "Site") +
  theme_bw() + theme(
    axis.text.x = element_text(size=14), 
    axis.title.x = element_text(size=16), 
    axis.text.y = element_text(size=14), 
    axis.title.y= element_text(size=16), 
    legend.text= element_text(size=14), 
    legend.title= element_text(size=16),
    strip.text= element_text(size=16),text = element_text(size = 16)) + geom_point(size = 4) 
dev.off()


pdf("Initial_salinity_ordination.pdf",width=10,height=8)
plot_ordination(ps2, GP.ord, type="samples", shape="type",color="Salinity") + labs(color = "Salinity") + theme(text = element_text(size = 16)) + geom_point(size = 4) + 
  theme_bw()
dev.off()

```

#make rarefaction curves
```{r}
#for wrack

step = 1000 #set to smaller value (e.g. 100 for smoother plots)

ps_wrack <- prune_samples(as.character(filter(meta, type == "wrack")$SampleID), ps2)

wrack = rarecurve((otu_table(ps_wrack)), step=step,  cex=0.5)


pdf("rare_wrack.pdf",width=12,height=8)
rarecurve(otu_table(ps_wrack), step=step, cex=0.5)
dev.off()

#for larvae
ps_larvae <- prune_samples(as.character(filter(meta, type == "larvae")$SampleID), ps2)
ps_larvae_noY5 = subset_samples(ps_larvae, SampleID != "Y5") #remove Y5 as it has almost double reads compared to other samples

larvae = rarecurve(otu_table(ps_larvae), step=step, cex=0.5) #sample is min number of OTUs found in the data set

larvae2 = rarecurve(otu_table(ps_larvae_noY5), step=step, cex=0.5) #sample is min number of OTUs found in the data set



pdf("rare_larvae.pdf",width=12,height=8)
rarecurve(otu_table(ps_larvae_noY5), step=step, cex=0.5)
dev.off()

```



#calculate effective number of species
```{r}
#library(reshape)


ps_alpha_div <- estimate_richness(ps2, split = TRUE, measure = "Shannon")

ps_alpha_div$Effective = exp(ps_alpha_div$Shannon)
ps_alpha_div$SampleID <- rownames(ps_alpha_div) %>%
  as.factor() 

sample_data(ps2)$Baltic <- factor(get_variable(ps2, "Population") %in% c("Ystad", "Smygehuk" ))

x = sample_data(ps2)
rownames(x) = x$SampleID
x$type = as.factor(x$type)

x$Population = as.factor(x$Population)

levels(x$type) = c("Larvae", "Wrack")


ps_samp <- x %>%
  unclass() %>%
  data.frame() %>%
  left_join(ps_alpha_div)

ps_samp = ps_samp %>% mutate(Population = str_replace(Population, "Bibel", "Justoya"))

ps_samp$Population = factor(ps_samp$Population,levels=c("Skeie", "Justoya", "Magnarp", "Smygehuk", "Ystad"))

p = ggplot(ps_samp) +
  geom_point(aes(x = Population, y = Effective, color=type
                 ),size = 5, alpha=0.65) +
  facet_wrap(~type) +
  labs(x = "Population", y = "Effective Number of Species") +  theme_bw() + theme(legend.position = "none",
    axis.text.x = element_text(size=14), 
    axis.title.x = element_text(size=16), 
    axis.text.y = element_text(size=14), 
    axis.title.y= element_text(size=16), 
    legend.text= element_text(size=14), 
    legend.title= element_text(size=16),
    strip.text= element_text(size=16)) + scale_x_discrete(labels=c("Skeie", "Justøya", "Magnarp", "Smygehuk", "Ystad"))


pdf("Effective_num_sp.pdf",width=12,height=8)
p
dev.off()

#ttest

t.test(x=ps_samp[ps_samp$type=="Larvae",]$Effective,y=ps_samp[ps_samp$type=="Wrack",]$Effective)
```



#extract basic sample information
```{r}
#pull sample information
samples = sample_data(ps2)

#get counts across all ASVs
samples$counts = rowSums(otu_table(ps2))

#get number of ASVs per sample with >1 count
samples$ASVs = rowSums(otu_table(ps2) > 1)

#Get shannon diversity
samples$shannon = ps_alpha_div$Shannon

#write table
write.table(samples, here("Data", "full_sample_info.csv"))
```


#make phylum make-up bar plot
```{r}
#check phylum level abundances
phyfac = factor(tax_table(ps2)[, "Phylum"])
gentab = apply(otu_table(ps2), MARGIN = 1, function(x) {
  tapply(x, INDEX = phyfac, FUN = sum, na.rm = TRUE, simplify = TRUE)
})

#remove phyla that make up less than 1% of overall abundances 

filterPhyla = c("Acidobacteriota","Armatimonadota", 
                "Bdellovibrionota", 
                "Calditrichota",
                "Chloroflexi",
                "Cloacimonadota","Cyanobacteria", "Deferribacterota","Deinococcota", "Elusimicrobiota", "Fibrobacterota", "Fusobacteriota", "Gemmatimonadota",  "Hydrogenedentes", "Myxococcota", "NB1-j", "Planctomycetota",  "Sumerlaeota", "Synergistota",  "WPS-2"
                )


ps2_filt = subset_taxa(ps2, !Phylum %in% filterPhyla)

rownames(ps_samp) = ps_samp$SampleID
sample_data(ps2_filt) = ps_samp

pstest = tax_glom(ps2_filt,taxrank="Phylum")


levels(sample_data(ps2_filt))$Population = c("Skeie", "Justoya", "Magnarp", "Smygehuk", "Ystad")
levels(sample_data(ps2_filt)$type) = c("Larvae", "Wrack")

#rareify to even depths
set.seed(2814532)
ps3R = rarefy_even_depth(ps2_filt, sample.size = 9500)
ps3RP = tax_glom(ps3R,taxrank="Phylum")
sample_data(ps3RP)$typepop <- paste(sample_data(ps3RP)$type,sample_data(ps3RP)$Population)
sample_data(ps3RP)$Population = as.factor(sample_data(ps3RP)$Population)
sample_data(ps3RP)$type = as.factor(sample_data(ps3RP)$type)

ps3Rm = merge_samples(ps3RP, "typepop")

sample_data(ps2_filt)$Population = as.factor(sample_data(ps2_filt)$Population)

sample_data(ps3Rm)$Population <- levels(sample_data(ps2_filt)$Population)
sample_data(ps3Rm)$typ = c("Larvae","Larvae","Larvae","Larvae","Larvae", "Wrack", "Wrack", "Wrack", "Wrack", "Wrack")


ps3ra = transform_sample_counts(ps3Rm, function(x) x / sum(x))
sample_data(ps3ra)$Population = factor(rev(unique(sample_data(ps3ra)$Population)), levels=c("Skeie", "Justoya", "Magnarp", "Smygehuk", "Ystad"), labels = c("Skeie","Justøya" ,"Magnarp", "Smygehuk", "Ystad"))



levels(sample_data(ps3ra)$Population) <- paste0(" \n", levels(sample_data(ps3ra)$Population) , "\n ")

#make phylum level plot

pdf("bar_plot.pdf",width=18,height=8)
plot_bar(ps3ra, x="typ", fill="Phylum") + facet_wrap(~Population) + coord_flip() + 
  ylab("Proportion of Sequences") + 
  geom_bar(aes( fill=Phylum), stat="identity", color="black", position="stack") +  theme_bw() +
  labs(y = "Proportion of Sequences", x = "Sample Type", color = "Phylum") + theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20),
    strip.text= element_text(size=20, margin = margin(.01, 0, .01, 0, "cm"))) + 
  scale_fill_brewer(palette = "Paired") 
dev.off()


#Make plots looking at different frequencies in Baltic vs. North Sea
#Only doing this for taxa groups that differ

#Phylum level plot 

prop = as.data.frame(otu_table(pstest))
prop$total = rowSums(prop[,1:10])
colnames(prop) = c("Desulfobacterota","Firmicutes","Actinobacteriota","Patescibacteria","Dependentiae","Verrucomicrobiota","Spirochaetota","Campylobacterota","Bacteroidota","Proteobacteria", "total")

prop$type = as.data.frame(sample_data(pstest))$type
prop$baltic = as.data.frame(sample_data(pstest))$Baltic


info=NULL 

for (i in seq(1,10,1)) {

propLB = subset(prop, prop$type=="Larvae"&prop$baltic=="TRUE")
propLNB = subset(prop, prop$type=="Larvae"&prop$baltic=="FALSE")
propWB = subset(prop, prop$type=="Wrack"&prop$baltic=="TRUE")
propWNB = subset(prop, prop$type=="Wrack"&prop$baltic=="FALSE")

LBMean = mean(propLB[[i]]/propLB[[11]])
LNBMean = mean(propLNB[[i]]/propLNB[[11]])
WBMean = mean(propWB[[i]]/propWB[[11]])
WNBMean = mean(propWNB[[i]]/propWNB[[11]])


LBsterr = sd(propLB[[i]]/propLB[[11]])/sqrt(27)
WBsterr = sd(propWB[[i]]/propWB[[11]])/sqrt(6)

LNBsterr = sd(propLNB[[i]]/propLNB[[11]])/sqrt(47)
WNBsterr = sd(propWNB[[i]]/propWNB[[11]])/sqrt(9)

test = c(colnames(prop)[[i]], LBMean, LBsterr, LNBMean, LNBsterr, WBMean, WBsterr, WNBMean,WNBsterr)
info = rbind(info, test)
  
}  

colnames(info) = c("Phylum", "Larval_baltic_mean", "Larval_baltic_sterr","Larval_Nbaltic_mean", "Larval_Nbaltic_sterr", "Wrack_baltic_mean", "Wrack_baltic_sterr", "Wrack_Nbaltic_mean", "Wrack_Nbaltic_sterr")
 


info = as.data.frame(info, row.names = info[,1])

info2 = t(info)
info2 = as.data.frame(info2[-1,])
info2$Type = c(rep("Larvae",4), rep("Wrack", 4))
info2$Baltic = c(rep("Baltic",2),rep("North Sea", 2),rep("Baltic",2),rep("North Sea", 2))
info2$metric = c("mean","sterr","mean","sterr","mean","sterr","mean","sterr")

means = subset(info2, metric=="mean")
sterr = subset(info2, metric=="sterr")
colnames(sterr) = paste(colnames(sterr),"err",sep="_")
all = cbind(means,sterr)

all$Desulfobacterota=as.numeric(as.character(all$Desulfobacterota))
all$Desulfobacterota_err=as.numeric(as.character(all$Desulfobacterota_err))
all$Actinobacteriota=as.numeric(as.character(all$Actinobacteriota))
all$Actinobacteriota_err=as.numeric(as.character(all$Actinobacteriota_err))


A = ggplot(all, aes(x=Baltic, y=Desulfobacterota, color=Type)) + geom_point(size=2)+ geom_line() + labs(y = "Proportion of Sequences", x = "Location", color = "Type of Sample", title="Desulfobacterota") + geom_errorbar( aes(ymin=Desulfobacterota-Desulfobacterota_err, ymax=Desulfobacterota+Desulfobacterota_err)) + theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20)) + theme_bw()

B = ggplot(all, aes(x=Baltic, y=Actinobacteriota, color=Type)) + geom_point(size=2)+ geom_line() + labs(y = "Proportion of Sequences", x = "Location", color = "Type of Sample", title="Actinobacteriota") + geom_errorbar( aes(ymin=Actinobacteriota-Actinobacteriota_err, ymax=Actinobacteriota+Actinobacteriota_err)) + theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20)) + theme_bw()
  
  
pdf("phylum_interactions.pdf",width=5,height=8)
plot_grid(A,B, ncol =1, align = "hv", labels=c("A","B"))
dev.off()
  
  
#Bacteroidota plot

ps2_bact = subset_taxa(ps2, Class=="Bacteroidia")

phyfac = factor(tax_table(ps2_bact)[, "Order"])
gentab = apply(otu_table(ps2_bact), MARGIN = 1, function(x) {
  tapply(x, INDEX = phyfac, FUN = sum, na.rm = TRUE, simplify = TRUE)
})

#keep everything 1% or more of total 
OrdertoKeep = c("Bacteroidales","Chitinophagales", "Cytophagales","Flavobacteriales"
                )
ps2_bact_filt = subset_taxa(ps2_bact, Order %in% OrdertoKeep)
ps2_bact_filt = tax_glom(ps2_bact_filt,taxrank="Order")

prop2 = as.data.frame(otu_table(ps2_bact_filt))

prop2$total = rowSums(prop2[,1:4])
colnames(prop2) = c("Chitinophagales", "Bacteroidales","Flavobacteriales","Cytophagales", "total")

prop2$type = as.data.frame(sample_data(pstest))$type
prop2$baltic = as.data.frame(sample_data(pstest))$Baltic


info=NULL 

for (i in seq(1,4,1)) {

propLB = subset(prop2, prop2$type=="Larvae"&prop2$baltic=="TRUE")
propLNB = subset(prop2, prop2$type=="Larvae"&prop2$baltic=="FALSE")
propWB = subset(prop2, prop2$type=="Wrack"&prop2$baltic=="TRUE")
propWNB = subset(prop2, prop2$type=="Wrack"&prop2$baltic=="FALSE")

LBMean = mean(propLB[[i]]/propLB[[5]])
LNBMean = mean(propLNB[[i]]/propLNB[[5]])
WBMean = mean(propWB[[i]]/propWB[[5]])
WNBMean = mean(propWNB[[i]]/propWNB[[5]])


LBsterr = sd(propLB[[i]]/propLB[[5]])/sqrt(27)
WBsterr = sd(propWB[[i]]/propWB[[5]])/sqrt(6)

LNBsterr = sd(propLNB[[i]]/propLNB[[5]])/sqrt(47)
WNBsterr = sd(propWNB[[i]]/propWNB[[5]])/sqrt(9)

test = c(colnames(prop2)[[i]], LBMean, LBsterr, LNBMean, LNBsterr, WBMean, WBsterr, WNBMean,WNBsterr)
info = rbind(info, test)
  
}  

colnames(info) = c("Order", "Larval_baltic_mean", "Larval_baltic_sterr","Larval_Nbaltic_mean", "Larval_Nbaltic_sterr", "Wrack_baltic_mean", "Wrack_baltic_sterr", "Wrack_Nbaltic_mean", "Wrack_Nbaltic_sterr")
 


info = as.data.frame(info, row.names = info[,1])

info2 = t(info)
info2 = as.data.frame(info2[-1,])
info2$Type = c(rep("Larvae",4), rep("Wrack", 4))
info2$Baltic = c(rep("Baltic",2),rep("North Sea", 2),rep("Baltic",2),rep("North Sea", 2))
info2$metric = c("mean","sterr","mean","sterr","mean","sterr","mean","sterr")

means = subset(info2, metric=="mean")
sterr = subset(info2, metric=="sterr")
colnames(sterr) = paste(colnames(sterr),"err",sep="_")
all = cbind(means,sterr)
all = as.data.frame(all)

all$Flavobacteriales=as.numeric(as.character(all$Flavobacteriales))
all$Flavobacteriales_err=as.numeric(as.character(all$Flavobacteriales_err))

flavo = ggplot(all, aes(x=Baltic, y=Flavobacteriales, color=Type)) + geom_point(size=2)+ geom_line() + labs(y = "Proportion of Sequences", x = "Location", color = "Type of Sample", title="Flavobacteriales") + geom_errorbar(aes(ymin=Flavobacteriales-Flavobacteriales_err, ymax=Flavobacteriales+Flavobacteriales_err)) + theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20)) + theme_bw()


#proteobacteria plots

ps2_prot = subset_taxa(ps2, Phylum=="Proteobacteria")
OrdertoKeep = c("Burkholderiales","Enterobacterales", "Legionellales","Pseudomonadales","Rhizobiales", "Rhodobacterales","Sphingomonadales","Xanthomonadales"
                )
ps2_prot_filt = subset_taxa(ps2_prot, Order %in% OrdertoKeep)
ps2_prot_filt =tax_glom(ps2_prot_filt, "Order")
prop3 = as.data.frame(otu_table(ps2_prot_filt))

prop3$total = rowSums(prop3[,1:8])
colnames(prop3) = c("Burkholderiales","Enterobacterales", "Legionellales","Pseudomonadales","Rhizobiales", "Rhodobacterales","Sphingomonadales","Xanthomonadales", "total")

prop3$type = as.data.frame(sample_data(pstest))$type
prop3$baltic = as.data.frame(sample_data(pstest))$Baltic

info=NULL 

for (i in seq(1,8,1)) {

propLB = subset(prop3, prop3$type=="Larvae"&prop3$baltic=="TRUE")
propLNB = subset(prop3, prop3$type=="Larvae"&prop3$baltic=="FALSE")
propWB = subset(prop3, prop3$type=="Wrack"&prop3$baltic=="TRUE")
propWNB = subset(prop3, prop3$type=="Wrack"&prop3$baltic=="FALSE")

LBMean = mean(propLB[[i]]/propLB[[9]])
LNBMean = mean(propLNB[[i]]/propLNB[[9]])
WBMean = mean(propWB[[i]]/propWB[[9]])
WNBMean = mean(propWNB[[i]]/propWNB[[9]])


LBsterr = sd(propLB[[i]]/propLB[[9]])/sqrt(27)
WBsterr = sd(propWB[[i]]/propWB[[9]])/sqrt(6)

LNBsterr = sd(propLNB[[i]]/propLNB[[9]])/sqrt(47)
WNBsterr = sd(propWNB[[i]]/propWNB[[9]])/sqrt(9)

test = c(colnames(prop3)[[i]], LBMean, LBsterr, LNBMean, LNBsterr, WBMean, WBsterr, WNBMean,WNBsterr)
info = rbind(info, test)
  
}  

colnames(info) = c("Order", "Larval_baltic_mean", "Larval_baltic_sterr","Larval_Nbaltic_mean", "Larval_Nbaltic_sterr", "Wrack_baltic_mean", "Wrack_baltic_sterr", "Wrack_Nbaltic_mean", "Wrack_Nbaltic_sterr")
 


info = as.data.frame(info, row.names = info[,1])

info2 = t(info)
info2 = as.data.frame(info2[-1,])
info2$Type = c(rep("Larvae",4), rep("Wrack", 4))
info2$Baltic = c(rep("Baltic",2),rep("North Sea", 2),rep("Baltic",2),rep("North Sea", 2))
info2$metric = c("mean","sterr","mean","sterr","mean","sterr","mean","sterr")

means = subset(info2, metric=="mean")
sterr = subset(info2, metric=="sterr")
colnames(sterr) = paste(colnames(sterr),"err",sep="_")
all = cbind(means,sterr)
all = as.data.frame(all)

all$Sphingomonadales=as.numeric(as.character(all$Sphingomonadales))
all$Sphingomonadales_err=as.numeric(as.character(all$Sphingomonadales_err))


all$Enterobacterales=as.numeric(as.character(all$Enterobacterales))
all$Enterobacterales_err=as.numeric(as.character(all$Enterobacterales_err))

sphing = ggplot(all, aes(x=Baltic, y=Sphingomonadales, color=Type)) + geom_point(size=2)+ geom_line() + labs(y = "Proportion of Sequences", x = "Location", color = "Type of Sample", title="Sphingomonadales") + geom_errorbar(aes(ymin=Sphingomonadales-Sphingomonadales_err, ymax=Sphingomonadales+Sphingomonadales_err)) + theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20)) + theme_bw()

entero = ggplot(all, aes(x=Baltic, y=Enterobacterales, color=Type)) + geom_point(size=2)+ geom_line() + labs(y = "Proportion of Sequences", x = "Location", color = "Type of Sample", title="Enterobacterales") + geom_errorbar(aes(ymin=Enterobacterales-Enterobacterales_err, ymax=Enterobacterales+Enterobacterales_err)) + theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20)) + theme_bw()

#Graph proteobacteria and bacteroidota together
  
pdf("order_interactions.pdf",width=5,height=12)
plot_grid(flavo,sphing, entero, ncol =1, align = "hv", labels=c("A","B", "C"))
dev.off()
```




#Look at proteobacteria classes and orders as relative proportions
```{r}

ps2_prot = subset_taxa(ps2, Phylum=="Proteobacteria")


rownames(ps_samp) = ps_samp$SampleID
sample_data(ps2_prot) = ps_samp

levels(sample_data(ps2_prot))$Population = c("Skeie", "Justoya", "Magnarp", "Smygehuk", "Ystad")
levels(sample_data(ps2_prot)$type) = c("Larvae", "Wrack")

#rareify to even depths
set.seed(2814532)
ps3R = rarefy_even_depth(ps2_prot, sample.size = 9500)
ps3RP = tax_glom(ps3R,taxrank="Class")
sample_data(ps3RP)$typepop <- paste(sample_data(ps3RP)$type,sample_data(ps3RP)$Population)
sample_data(ps3RP)$Population = as.factor(sample_data(ps3RP)$Population)
sample_data(ps3RP)$type = as.factor(sample_data(ps3RP)$type)

ps3Rm = merge_samples(ps3RP, "typepop")

sample_data(ps2_prot)$Population = as.factor(sample_data(ps2_prot)$Population)

sample_data(ps3Rm)$Population <- levels(sample_data(ps2_prot)$Population)
sample_data(ps3Rm)$typ = c("Larvae","Larvae","Larvae","Larvae","Larvae", "Wrack", "Wrack", "Wrack", "Wrack", "Wrack")


ps3ra = transform_sample_counts(ps3Rm, function(x) x / sum(x))

custom_col21 = c("#771155","#AA4488", "#CC99BB", "#114477", "#4477AA", "#77AADD", "#117777", "#44AAAA", "#77CCCC", "#117744", "#44AA77", "#88CCAA", "#777711", "#AAAA44", "#DDDD77", "#774411", "#AA7744", "#DDAA77", "#771122", "#AA4455", "#DD7788")
custom_col12 = c("#771155", "#CC99BB",  "#4477AA","#117777", "#77CCCC",  "#44AA77",  "#777711",  "#DDDD77",  "#AA7744",  "#771122","#AA4455", "#DD7788")


levels(sample_data(ps3ra)$Population) <- paste0(" \n", levels(sample_data(ps3ra)$Population) , "\n ")



p4 = plot_bar(ps3ra, x="typ", fill="Class") + facet_wrap(~Population) + coord_flip() + 
  ylab("Proportion of Sequences") + 
  geom_bar(aes( fill=Class), stat="identity", color="black", position="stack") +
  labs(x = "Proportion of Sequences", y = "Sample Type", color = "Class") + theme(
    axis.text.x = element_text(size=14), 
    axis.title.x = element_text(size=16), 
    axis.text.y = element_text(size=14), 
    axis.title.y= element_text(size=16), 
    legend.text= element_text(size=14), 
    legend.title= element_text(size=16),
    strip.text= element_text(size=16, margin = margin(.1, 0, .1, 0, "cm"))) + 
  theme_bw()

pdf("proteo_bar_plot.pdf",width=16,height=8)
plot_bar(ps3ra, x="typ", fill="Class") + facet_wrap(~Population) + coord_flip() + 
  ylab("Proportion of Sequences") + 
  geom_bar(aes( fill=Class), stat="identity", color="black", position="stack") +
  labs(y = "Proportion of Sequences", x = "Sample Type", color = "Class") + theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20),
    strip.text= element_text(size=24, margin = margin(.1, 0, .1, 0, "cm"))) + 
  scale_fill_brewer(palette = "Paired") + theme_bw()
dev.off()


## same but looking at Orders instead of classes
phyfac = factor(tax_table(ps2_prot)[, "Order"])
gentab = apply(otu_table(ps2_prot), MARGIN = 1, function(x) {
  tapply(x, INDEX = phyfac, FUN = sum, na.rm = TRUE, simplify = TRUE)
})

OrdertoKeep = c("Burkholderiales","Enterobacterales", "Legionellales","Pseudomonadales","Rhizobiales", "Rhodobacterales","Sphingomonadales","Xanthomonadales"
                )
ps2_prot_filt = subset_taxa(ps2_prot, Order %in% OrdertoKeep)

set.seed(2814532)
ps3R = rarefy_even_depth(ps2_prot_filt, sample.size = 9500)
ps3RP = tax_glom(ps3R,taxrank="Order")
sample_data(ps3RP)$typepop <- paste(sample_data(ps3RP)$type,sample_data(ps3RP)$Population)
sample_data(ps3RP)$Population = as.factor(sample_data(ps3RP)$Population)
sample_data(ps3RP)$type = as.factor(sample_data(ps3RP)$type)

ps3Rm = merge_samples(ps3RP, "typepop")


sample_data(ps3Rm)$Population <- levels(sample_data(ps2_prot)$Population)
sample_data(ps3Rm)$typ = c("Larvae","Larvae","Larvae","Larvae","Larvae", "Wrack", "Wrack", "Wrack", "Wrack", "Wrack")


ps3ra = transform_sample_counts(ps3Rm, function(x) x / sum(x))


levels(sample_data(ps3ra)$Population) <- paste0(" \n", levels(sample_data(ps3ra)$Population) , "\n ")




pdf("proteo_bar_plot_order.pdf",width=16,height=8)
plot_bar(ps3ra, x="typ", fill="Order") + facet_wrap(~Population) + coord_flip() + 
  ylab("Proportion of Sequences") +   
  scale_fill_brewer(palette = "Paired")  +
  geom_bar(aes(fill=Order), stat="identity", color="black", position="stack") +
  labs(y = "Proportion of Sequences", x = "Sample Type", color = "Order") +  theme_bw() + theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20),
    strip.text= element_text(size=24, margin = margin(.1, 0, .1, 0, "cm")))
dev.off()
```


#Look at bacteriodetes orders for relative proportions
```{r}
#check phylum level abundances

ps2_bact = subset_taxa(ps2, Class=="Bacteroidia")

phyfac = factor(tax_table(ps2_bact)[, "Order"])
gentab = apply(otu_table(ps2_bact), MARGIN = 1, function(x) {
  tapply(x, INDEX = phyfac, FUN = sum, na.rm = TRUE, simplify = TRUE)
})

OrdertoKeep = c("Bacteroidales","Chitinophagales", "Cytophagales","Flavobacteriales"
                )
ps2_bact_filt = subset_taxa(ps2_bact, Order %in% OrdertoKeep)




rownames(ps_samp) = ps_samp$SampleID
sample_data(ps2_bact_filt) = ps_samp

levels(sample_data(ps2_bact_filt))$Population = c("Skeie", "Justoya", "Magnarp", "Smygehuk", "Ystad")
levels(sample_data(ps2_bact_filt)$type) = c("Larvae", "Wrack")

#rareify to even depths
set.seed(2814532)
ps3R = rarefy_even_depth(ps2_bact_filt, sample.size = 9500)
ps3RP = tax_glom(ps3R,taxrank="Order")
sample_data(ps3RP)$typepop <- paste(sample_data(ps3RP)$type,sample_data(ps3RP)$Population)
sample_data(ps3RP)$Population = as.factor(sample_data(ps3RP)$Population)
sample_data(ps3RP)$type = as.factor(sample_data(ps3RP)$type)

ps3Rm = merge_samples(ps3RP, "typepop")

sample_data(ps2_bact)$Population = as.factor(sample_data(ps2_bact)$Population)

sample_data(ps3Rm)$Population <- levels(sample_data(ps2_bact)$Population)
sample_data(ps3Rm)$typ = c("Larvae","Larvae","Larvae","Larvae","Larvae", "Wrack", "Wrack", "Wrack", "Wrack", "Wrack")


ps3ra = transform_sample_counts(ps3Rm, function(x) x / sum(x))


sample_data(ps3ra)$Population = factor(rev(unique(sample_data(ps3ra)$Population)), levels=c("Skeie", "Justoya", "Magnarp", "Smygehuk", "Ystad"), labels = c("Skeie","Justøya" ,"Magnarp", "Smygehuk", "Ystad"))

levels(sample_data(ps3ra)$Population) <- paste0(" \n", levels(sample_data(ps3ra)$Population) , "\n ")


pdf("bact_bar_plot.pdf",width=16,height=8)
plot_bar(ps3ra, x="typ", fill="Order") + facet_wrap(~Population) + coord_flip() + 
  ylab("Proportion of Sequences") +  theme_bw() +
  geom_bar(aes( fill=Order), stat="identity", color="black", position="stack") +
  labs(y = "Proportion of Sequences", x = "Sample Type", color = "Order") + theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20),
    strip.text= element_text(size=24, margin = margin(.1, 0, .1, 0, "cm"))) + 
  scale_fill_brewer(palette = "Paired") 
dev.off()

```



#Run Philr for all samples
```{r}

#remove low prev otus
Abundance = taxa_sums(ps2)
prev = apply(X = otu_table(ps2), MARGIN = ifelse(taxa_are_rows(ps2), yes = 1, no = 2), FUN = function(x){sum(x > 0)})

prevdf = data.frame(Prevalence = prev,  TotalAbundance = Abundance, tax_table(ps2))
ps3 <-  filter_taxa(ps2, function(x) sd(x)/mean(x) > 3.0, TRUE)

keepTaxa = rownames(prevdf)[(prevdf$Prevalence >= 2 & prevdf$TotalAbundance >= 5)]
ps3 = prune_taxa(keepTaxa, ps3)
ps3 <- transform_sample_counts(ps3, function(x) x+1)

#check and prepare for philr
is.rooted(phy_tree(ps3))
is.binary.tree(phy_tree(ps3))
phy_tree(ps3) <- makeNodeLabel(phy_tree(ps3), method="number", prefix='n')
name.balance(phy_tree(ps3), tax_table(ps3), 'n1')
otu.table <- (otu_table(ps3))
tree <- phy_tree(ps3)
metadata <- sample_data(ps3)
tax <- tax_table(ps3)
otu.table[1:2,1:2]

#run philr and ordinate
gp.philr <- philr(otu.table, tree, 
                  part.weights='enorm.x.gm.counts', 
                  ilr.weights='blw.sqrt')
gp.philr[1:5,1:5]
gp.dist <- dist(gp.philr, method="euclidean")
gp.pcoa <- ordinate(ps3, 'PCoA', distance=gp.dist)
plot_ordination(ps3, gp.pcoa, color='Population', shape="type") + geom_point(size=4)


#look for balances that identify baltic/non-baltic
sample_data(ps3)$Baltic <- factor(get_variable(ps3, "Population") %in% c("Ystad", "Smygahuk" ))

cv.glmnet(gp.philr, sample_data(ps3)$Baltic, type.measure="class", family="binomial")  #determine best lambda

glmmod <- glmnet(gp.philr, sample_data(ps3)$Baltic, alpha=1, family="binomial")



top.coords <- as.matrix(coefficients(glmmod, s=0.01602)) #using lambda from above
top.coords <- rownames(top.coords)[which(top.coords != 0)]
(top.coords <- top.coords[2:length(top.coords)]) # remove the intercept as a coordinate

#top coords are n16,n2152, n2153, n2167, n2715, n2727, n2729, n3557

tc.names <- sapply(top.coords, function(x) name.balance(tree, tax, x))
tc.names


#visualize balances

#Prep data 
sample_data(ps3)$typepop <- paste(sample_data(ps3)$type,sample_data(ps3)$Baltic, sep="_")
gp.philr.long <- convert_to_long(gp.philr, get_variable(ps3, 'typepop')) %>%
  filter(coord %in% top.coords)

info = str_split_fixed(gp.philr.long$labels, "_", 2)
gp.philr.long$type = info[,1]
gp.philr.long$baltic = info[,2]

gp.philr.long = gp.philr.long %>% mutate(baltic = str_replace(baltic, "FALSE", "North Sea"))
gp.philr.long = gp.philr.long %>% mutate(baltic = str_replace(baltic, "TRUE", "Baltic"))
colnames(gp.philr.long)[5] = "Type"
gp.philr.long = gp.philr.long %>% mutate(Type = str_replace(Type, "larvae", "Larvae"))
gp.philr.long = gp.philr.long %>% mutate(Type = str_replace(Type, "wrack", "Wrack"))


#Plot top balances
ggplot(gp.philr.long, aes(x=baltic, y=value, color=Type)) +
  geom_boxplot(fill='lightgrey') +
  facet_wrap(.~coord, scales='free_x',nrow = 2,labeller = labeller(coord = tc.names, facet_category = label_wrap_gen(width = 16))) +
  xlab('Location') + ylab('Balance Value') + 
  theme_bw()  



pdf("philr_balances.pdf",width=10,height=6)
ggplot(gp.philr.long, aes(x=baltic, y=value, color=Type)) +
  geom_boxplot(fill='lightgrey') +
  facet_wrap(.~coord, scales='free_x',nrow = 2) +
  xlab('Location') + ylab('Balance Value') + theme_bw() + theme(legend.position = "bottom",
    axis.text.x = element_text(size=14), 
    axis.title.x = element_text(size=16), 
    axis.text.y = element_text(size=14), 
    axis.title.y= element_text(size=16), 
    legend.text= element_text(size=14), 
    legend.title= element_text(size=16),
    strip.text= element_text(size=16)) 

dev.off()



#get numerators and denominators for balances
votes <- name.balance(tree, tax, 'n16', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Phylum')]]
votes[[c('down.votes', 'Phylum')]]

votes <- name.balance(tree, tax, 'n2152', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Class')]]
votes[[c('down.votes', 'Class')]]

votes <- name.balance(tree, tax, 'n2153', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Class')]]
votes[[c('down.votes', 'Class')]]

votes <- name.balance(tree, tax, 'n2167', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Order')]]
votes[[c('down.votes', 'Order')]]


votes <- name.balance(tree, tax, 'n2167', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Order')]]
votes[[c('down.votes', 'Order')]]

votes <- name.balance(tree, tax, 'n2715', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Species')]]
votes[[c('down.votes', 'Species')]]



votes <- name.balance(tree, tax, 'n2727', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Species')]]
votes[[c('down.votes', 'Species')]]


votes <- name.balance(tree, tax, 'n2729', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Genus')]]
votes[[c('down.votes', 'Genus')]]

votes <- name.balance(tree, tax, 'n3557', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Genus')]]
votes[[c('down.votes', 'Genus')]]

```



#ADONIS on philr output overall
```{r}
#use gp.dist

info = as.character(samples$Baltic)
adonis(gp.dist ~ info,permutations=999,method="euclidean")

```



#Run philr for larvae only
```{r}
ps_larvae <- prune_samples(as.character(filter(meta, type == "larvae")$SampleID), ps2)

Abundance = taxa_sums(ps_larvae)
prev = apply(X = otu_table(ps_larvae), MARGIN = ifelse(taxa_are_rows(ps_larvae), yes = 1, no = 2), FUN = function(x){sum(x > 0)})

prevdf = data.frame(Prevalence = prev,  TotalAbundance = Abundance, tax_table(ps_larvae))
ps3L <-  filter_taxa(ps_larvae, function(x) sd(x)/mean(x) > 3.0, TRUE)

keepTaxa = rownames(prevdf)[(prevdf$Prevalence >= 2 & prevdf$TotalAbundance >= 5)]
ps3L = prune_taxa(keepTaxa, ps3L)
ps3L <- transform_sample_counts(ps3L, function(x) x+1)

is.rooted(phy_tree(ps3L))
is.binary.tree(phy_tree(ps3L))
phy_tree(ps3L) <- makeNodeLabel(phy_tree(ps3L), method="number", prefix='n')
name.balance(phy_tree(ps3L), tax_table(ps3L), 'n1')
otu.table <- (otu_table(ps3L))
tree <- phy_tree(ps3L)
metadata <- sample_data(ps3L)
tax <- tax_table(ps3L)
otu.table[1:2,1:2]


gp.philr <- philr(otu.table, tree, 
                  part.weights='enorm.x.gm.counts', 
                  ilr.weights='blw.sqrt')
gp.philr[1:5,1:5]
gp.dist <- dist(gp.philr, method="euclidean")
gp.pcoa <- ordinate(ps3L, 'PCoA', distance=gp.dist)
plot_ordination(ps3L, gp.pcoa, color='Genotype', shape="Population") + geom_point(size=4)

#make lables with greek letters
greeks = c(expression(paste(alpha,alpha)), expression(paste(alpha,beta)), expression(paste(beta,beta)), "Genotype Unavailable")

#recreate default ggplot colors
ggplotColours <- function(n = 6, h = c(0, 360) + 15){
  if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360/n
  hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}

pdf("philr_larvae_geno.pdf",width=6,height=4)
plot_ordination(ps3L, gp.pcoa, color="Population",shape="Genotype") + geom_point(size=3, alpha=0.9) + scale_shape_manual(values=c(15,16,17),labels=greeks, na.value = 18)
dev.off()


larvae = plot_ordination(ps3L, gp.pcoa, color='Population')  + geom_point(size=5, alpha=0.9)  + theme_bw() + theme(legend.position = "none")


pdf("philr_larvae_pop.pdf",width=5,height=4)
plot_ordination(ps3L, gp.pcoa, color='Population')  + geom_point(size=5, alpha=0.9) 
dev.off()



```



#Run philr for wrack
```{r}

ps_wrack <- prune_samples(as.character(filter(meta, type == "wrack")$SampleID), ps2)

Abundance = taxa_sums(ps_wrack)
prev = apply(X = otu_table(ps_wrack), MARGIN = ifelse(taxa_are_rows(ps_wrack), yes = 1, no = 2), FUN = function(x){sum(x > 0)})

prevdf = data.frame(Prevalence = prev,  TotalAbundance = Abundance, tax_table(ps_wrack))
ps3L <-  filter_taxa(ps_wrack, function(x) sd(x)/mean(x) > 3.0, TRUE)

keepTaxa = rownames(prevdf)[(prevdf$Prevalence >= 2 & prevdf$TotalAbundance >= 5)]
ps3L = prune_taxa(keepTaxa, ps3L)
ps3L <- transform_sample_counts(ps3L, function(x) x+1)

is.rooted(phy_tree(ps3L))
is.binary.tree(phy_tree(ps3L))
phy_tree(ps3L) <- makeNodeLabel(phy_tree(ps3L), method="number", prefix='n')
name.balance(phy_tree(ps3L), tax_table(ps3L), 'n1')
otu.table <- (otu_table(ps3L))
tree <- phy_tree(ps3L)
metadata <- sample_data(ps3L)
tax <- tax_table(ps3L)
otu.table[1:2,1:2]


gp.philr <- philr(otu.table, tree, 
                  part.weights='enorm.x.gm.counts', 
                  ilr.weights='blw.sqrt')
gp.philr[1:5,1:5]
gp.dist <- dist(gp.philr, method="euclidean")

gp.pcoa <- ordinate(ps3L, 'PCoA', distance=gp.dist)
wrack= plot_ordination(ps3L, gp.pcoa, color='Population') + geom_point(size=4) + theme_bw() + theme(legend.position = "none") 



pdf("philr_wrack.pdf",width=5,height=4)
plot_ordination(ps3L, gp.pcoa, color='Population') + geom_point(size=5, alpha=0.9)
dev.off()

#output both wrack and larvae philr as 1 figure

legend = plot_ordination(ps3L, gp.pcoa, color='Population') + geom_point(size=5, alpha=0.9) + labs(color = "Site") + scale_color_discrete(labels=c("Skeie", "Justøya", "Magnarp", "Smygehuk", "Ystad"))

legend_b2 <- get_legend(legend +theme(legend.direction="horizontal",legend.position="bottom"))


pdf("philr_separated.pdf",width=5,height=8)
plot_grid(wrack,larvae,legend_b2, ncol =1, align = "hv", labels=c("A","B"), rel_heights = c(1,1,0.2))
dev.off()


```



#export data to run FAPROTAX
```{r}

tax<-as(tax_table(ps2),"matrix")
tax_cols <- colnames(tax)
tax<-as.data.frame(tax)
tax$taxonomy<-do.call(paste, c(tax[tax_cols], sep=";"))
for(co in tax_cols) tax[co]<-NULL
write.table(tax, here("Data", "tax.txt"), quote=FALSE, col.names=FALSE, sep="\t")

tax2 = read.table("tax_edited.txt", sep="\t", header=FALSE,row.names = 1)
colnames(tax2)[1] = "taxonomy"
otu<-t(as(otu_table(ps2),"matrix")) # 't' to transform if taxa_are_rows=FALSE
otu_biom<-make_biom(data=otu)
write_biom(otu_biom, here("Data", "otu_biom.biom"))


#run on command line: 

#(1)biom add-metadata -i otu_biom.biom -o table.w_smd.biom --sc-separated taxon --observation-metadata-fp tax_edited_good.txt  #(2) ./collapse_table.py -i  table.w_smd.biom  -o func_table.biom -g FAPROTAX.txt --collapse_by_metadata "taxonomy"  -v --out_group_overlaps overlaps --out_groups2records_table groups2records.txt 
#(3) biom convert -i func_table.biom  -o func_table.txt --to-tsv 

#categories to remove associated with less than 3 OTUs arsenate_detoxification arsenate_respiration	dissimilatory_arsenate_reduction	arsenite_oxidation_detoxification dissimilatory_arsenite_oxidation manganese_oxidation	manganese_respiration invertebrate_parasites	human_pathogens_septicemia	human_pathogens_pneumonia human_pathogens_gastroenteritis	human_pathogens_diarrhea anoxygenic_photoautotrophy_H2_oxidizing aerobic_anoxygenic_phototrophy

#categories removed for jaccard similarity methanol_oxidation nitrate_denitrification nitrite_denitrification nitrous_oxide_denitrification nitrite_ammonification human_gut anoxygenic_photoautotrophy_S_oxidizing photosynthetic_cyanobacteria nitrate_respiration


```

#functional ordination using faprotax output
```{r}
func = read.table(here("Data", "faprotax_filtered_foranalysis.csv"), sep=",", header=TRUE, row.names=1, check.names=FALSE)

funcp <- phyloseq(sample_data(sample_data(ps2)), otu_table(func, taxa_are_rows = FALSE))

pslog_all <- transform_sample_counts(funcp, function(x) log(1 + x))
out.bc.all <- ordinate(pslog_all, method = "MDS", distance = "bray")
evalsA <- out.bc.all$values$Eigenvalues
plot_ordination(pslog_all, out.bc.all, color = "Population", shape="type") +
  coord_fixed(sqrt(evalsA[2] / evalsA[1])) + geom_point(size=3) 


```

#Make Figure 4
```{r}

sample_data(ps2_filt)$baltic <- factor(get_variable(ps3, "Population") %in% c("Ystad", "Smygehuk" ))

pstree = ps3

sample_data(pstree) = sample_data(ps2_filt)

sample_data(pslog_all) = sample_data(ps2_filt)

p1 = plot_ordination(pstree, gp.pcoa, color='type') + geom_point(size=4) + stat_ellipse(type = "norm", linetype = 2)  + theme_bw()+ theme(legend.position="none")

p1

p2 = plot_ordination(pslog_all, out.bc.all, color = "type") +
  coord_fixed(sqrt(evalsA[2] / evalsA[1])) + geom_point(size=4) + 
  stat_ellipse(type = "norm", linetype = 2) + scale_color_hue(labels = c("Larvae", "Wrack")) + theme_bw() + labs(color="Type")

p2


ORD_DF <- 
plot_ordination(pstree, gp.pcoa, justDF = TRUE)


p3 <- ggplot(ORD_DF, aes(x=Axis.1, y = Axis.2, colour = Population, shape=type))+
  geom_point(size=4) + 
  stat_ellipse(aes(group = baltic, linetype = baltic), type = "norm", colour = "grey") + theme_bw() +
  theme(legend.position="none") + labs(x="Axis.1 [27.7%]",y="Axis.2 [14.7%]")

p3

ORD_df2 <- plot_ordination(pslog_all, out.bc.all, justDF = TRUE) 

p4 <- ggplot(ORD_df2, aes(x=Axis.1, y = Axis.2, colour = Population, shape=type))+
  coord_fixed(sqrt(evalsA[2] / evalsA[1]))+
  geom_point(size=4) + 
  stat_ellipse(aes(group = baltic, linetype = baltic), type = "norm", colour = "grey")+
  scale_linetype_discrete(name = "Location", labels = c("North Sea", "Baltic"))+
  scale_colour_discrete(name = "Site",labels=c("Skeie", "Justøya", "Magnarp", "Smygehuk", "Ystad"))+
  scale_shape_discrete(name = "Type") + theme_bw()  + labs(x="Axis.1 [38.8%]",y="Axis.2 [22.5%]")
p4

pdf("Fig_2.pdf",width=12,height=8)
plot_grid(plot_grid(p1,p2,p3,p4,nrow=2,align="vh",rel_widths = c(1, 1.88),labels="AUTO",label_x=c(0,0.17), label_size=18 ),ncol = 1, align = "vh")
dev.off()
```


#Perform a PERMDISP with functional ordination
```{r}
library(vegan)

vegan_otu <- function(physeq) {
  OTU <- otu_table(physeq)
  if (taxa_are_rows(OTU)) {
    OTU <- t(OTU)
  }
  return(as(OTU, "matrix"))
}

otu_a = vegan_otu(pslog_all)

sam_dataa = sample_data(ps2_filt)

vare.disa <- vegdist(otu_a)


#MANOVA
info = as.character(sam_dataa$type)
adonis(vare.disa ~ info,permutations=999,method="euclidean")


mod_type = betadisper(vare.disa, sam_dataa$type)
anova(mod_type)
permutest(mod_type, pairwise = TRUE, permutations = 5000)
(mod.HSDt <- TukeyHSD(mod_type))
plot(mod.HSDt)
boxplot(mod_type, xlab="Type", notch=FALSE, )


mod_pop = betadisper(vare.disa, sam_dataa$Population)
anova(mod_pop)
permutest(mod_pop, pairwise = TRUE, permutations = 5000)
(mod.HSDp <- TukeyHSD(mod_pop))
plot(mod.HSDp)
boxplot(mod_pop, xlab="Population", notch=FALSE, )


mod_balt = betadisper(vare.disa, sam_dataa$Baltic)
anova(mod_balt)
permutest(mod_balt, pairwise = TRUE, permutations = 5000)
(mod.HSDp <- TukeyHSD(mod_balt))
plot(mod.HSDp)
boxplot(mod_balt, xlab="Baltic", notch=FALSE, )


```

#Perform a PERMDISP with Philr ordination
```{r}
otu_a = vegan_otu(pstree)

sam_dataa = sample_data(ps2_filt)

vare.disa <- vegdist(otu_a)

mod_type = betadisper(vare.disa, sam_dataa$type)
anova(mod_type)
permutest(mod_type, pairwise = TRUE)
(mod.HSDt <- TukeyHSD(mod_type))
plot(mod.HSDt)
boxplot(mod_type, xlab="Type", notch=FALSE, )


mod_pop = betadisper(vare.disa, sam_dataa$Population)
anova(mod_pop)
permutest(mod_pop, pairwise = TRUE)
(mod.HSDp <- TukeyHSD(mod_pop))
plot(mod.HSDp)
boxplot(mod_pop, xlab="Population", notch=FALSE, )


mod_balt = betadisper(vare.disa, sam_dataa$Baltic)
anova(mod_balt)
permutest(mod_balt, pairwise = TRUE)
(mod.HSDp <- TukeyHSD(mod_balt))
plot(mod.HSDp)
boxplot(mod_balt, xlab="Baltic", notch=FALSE, )
```


#Functional BarPlot
```{r}
func = read.csv(here("Data", "faprotax_filtered_forboxplot.csv"), row.names = 1)
func = as.matrix(func)

taxa = read.table(here("Data", "faketaxa_faprotax_barplot.txt"), header=TRUE,sep="\t", na.strings=c(""," ","NA"), row.names=1,
                  colClasses = c("factor", "factor", "factor", "factor", "factor", "factor", "factor"))
taxa= as.matrix(taxa)
func_phylo = phyloseq(sample_data(sample_data(ps3)), tax_table(taxa), otu_table(func, taxa_are_rows = FALSE))



set.seed(2814532)
funcR = rarefy_even_depth(func_phylo, sample.size = 6300)

sample_data(funcR)$population2 = sample_data(funcR)$Population
sample_data(funcR)$type2 = sample_data(funcR)$type
sample_data(funcR)$typepop <- paste(sample_data(funcR)$type2,sample_data(funcR)$population2)


funcRm = merge_samples(funcR, "typepop")
sample_data(funcRm)$pop = c("Justoya", "Magnarp", "Skeie", "Smygehuk", "Ystad", "Justoya", "Magnarp", "Skeie", "Smygehuk", "Ystad")
sample_data(funcRm)$typ = c("Larvae","Larvae","Larvae","Larvae","Larvae", "Wrack", "Wrack", "Wrack", "Wrack", "Wrack")

sample_data(funcRm)$pop = factor(rev(unique(sample_data(funcRm)$pop)), levels=c("Skeie", "Justoya", "Magnarp", "Smygehuk", "Ystad"), labels = c("Skeie","Justøya" ,"Magnarp", "Smygehuk", "Ystad"))
levels(sample_data(funcRm)$type) = c("Larvae", "Wrack")
funcra = transform_sample_counts(funcRm, function(x) x / sum(x))  


pdf("func_bar_plot.pdf",width=18,height=8)
plot_bar(funcra, x="typ", fill="Phylum") + facet_wrap(~pop) + coord_flip() + 
  ylab("Proportion of Sequences") +  
  geom_bar(aes( fill=Phylum), stat="identity", color="black", position="stack") +
  labs(y = "Proportion of Sequences", x = "Sample Type", color = "Function", fill="Function")  + theme_bw()+ theme(
    axis.text.x = element_text(size=18), 
    axis.title.x = element_text(size=20), 
    axis.text.y = element_text(size=18), 
    axis.title.y= element_text(size=20), 
    legend.text= element_text(size=18), 
    legend.title= element_text(size=20),
    strip.text= element_text(size=24, margin = margin(.1, 0, .1, 0, "cm"))) 
dev.off()

```


#look at overall prevalance
```{r}
#load in prevalance data
prev = read.table(here("Data", "overall_prevalance.csv"), header=TRUE)

pdf("prevalance_histogram.pdf",width=10,height=6)
ggplot(prev, aes(x=Overall_prevalance)) + 
 geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth = 0.02)+
 geom_density(alpha=.2, fill="#FF6666") + labs(x = "Prevalance", y = "Density") + theme_bw()
dev.off()
```



#Run ANCOM
```{r}
ps2_bact = subset_taxa(ps2_filt, Class=="Bacteroidia")
OrdertoKeep = c("Bacteroidales","Chitinophagales", "Cytophagales","Flavobacteriales")
ps2_bact_filt = subset_taxa(ps2_bact, Order %in% OrdertoKeep)

ps2_prot = subset_taxa(ps2_filt, Phylum=="Proteobacteria")
OrdertoKeep = c("Burkholderiales","Enterobacterales", "Legionellales","Pseudomonadales","Rhizobiales", "Rhodobacterales","Sphingomonadales","Xanthomonadales"
                )
ps2_prot_filt = subset_taxa(ps2_prot, Order %in% OrdertoKeep)


phylum_data <- tax_glom(ps2_filt, "Phylum")
bOrder_data = tax_glom(ps2_bact_filt, "Order")
pOrder_data = tax_glom(ps2_prot_filt, "Order")

# zero_cut seems to have replaced prev_cut with a default of 0.9 (1-0.1)
out = ancombc(phyloseq = phylum_data, formula = "type + Baltic + type*Baltic",  p_adj_method = "holm", zero_cut = 0.90,  struc_zero = FALSE, neg_lb = FALSE, tol = 1e-5,  max_iter = 100, conserve = TRUE, alpha = 0.05, global = FALSE)

res = out$res
tab_p = res$p_val
rownames(tab_p)=tax_table(phylum_data)[,2]

#lfc is now called beta?
ancom_da = out
ancom_res_df <- data.frame(
  Species = tax_table(phylum_data)[,2],
  beta = unlist(ancom_da$res$beta),
  se = unlist(ancom_da$res$se),
  W = unlist(ancom_da$res$W),
  p_val = unlist(ancom_da$res$p_val),
  q_val = unlist(ancom_da$res$q_val),
  diff_abn = unlist(ancom_da$res$diff_abn))


write.table(ancom_res_df, here("Data", "phyla.csv"))


out = ancombc(phyloseq = bOrder_data, formula = "type + Baltic + type*Baltic",  p_adj_method = "holm", zero_cut = 0.90, struc_zero = FALSE, neg_lb = FALSE, tol = 1e-5,  max_iter = 100, conserve = TRUE, alpha = 0.05, global = FALSE)

res = out$res
tab_p = res$p_val
rownames(tab_p)=tax_table(bOrder_data)[,4]

ancom_da = out
ancom_res_df <- data.frame(
  Species = tax_table(bOrder_data)[,4],
 beta = unlist(ancom_da$res$beta),
  se = unlist(ancom_da$res$se),
  W = unlist(ancom_da$res$W),
  p_val = unlist(ancom_da$res$p_val),
  q_val = unlist(ancom_da$res$q_val),
  diff_abn = unlist(ancom_da$res$diff_abn))


write.table(ancom_res_df, here("Data", "bact_order.csv"))

out = ancombc(phyloseq = pOrder_data, formula = "type + Baltic + type*Baltic",  p_adj_method = "holm", zero_cut = 0.90,  struc_zero = FALSE, neg_lb = FALSE, tol = 1e-5,  max_iter = 100, conserve = TRUE, alpha = 0.05, global = FALSE)

res = out$res
tab_p = res$p_val
rownames(tab_p)=tax_table(pOrder_data)[,4]

ancom_da = out
ancom_res_df <- data.frame(
  Species = tax_table(pOrder_data)[,4],
  beta = unlist(ancom_da$res$beta),
  se = unlist(ancom_da$res$se),
  W = unlist(ancom_da$res$W),
  p_val = unlist(ancom_da$res$p_val),
  q_val = unlist(ancom_da$res$q_val),
  diff_abn = unlist(ancom_da$res$diff_abn))

write.table(ancom_res_df, here("Data", "prot_order.csv"))

```

